6. Differential abundance

1 INFO

This Rmarkdown document contains code for analysis of differential abundances from microbiome data provided from output from the script 1_Import.Rmd based on output from the DF_GMH_PIPELINE. For analyses of differential abundance we will use the DAtest package (Russel et al., 2018).

2 SETUP

knitr::opts_chunk$set(echo = TRUE)

# Load libraries
library(tidyverse)
library(phyloseq)
library(decontam)
library(pals)
library(ggpubr)
library(rstatix)
library(vegan)
library(reshape2)
library(DAtest)
library(ape)
library(kableExtra)
library(plotly)
library(magick)
library(forcats)

saveRDS(params, "R_objects/comp_params.RDS")

2.0.1 SCRIPTS

These functions are used to merge ASV that are not one of the most abundant, or below a defined threshold, into “Other”

# This function merges anything not in the top (10 by default) most abundant ASVs
merge_less_than_top <- function(pobject, top=10){
  # transform to sample counts to relative values
  transformed <- transform_sample_counts(pobject, function(x) x/sum(x))

    # Orientate and export otu table
  if (taxa_are_rows(transformed)) otu.table <- as.data.frame(otu_table(transformed)) else otu.table <- as.data.frame(t(otu_table(transformed)))
  
  # sort otu table by decreasing abundance
  otu.sort <- otu.table[order(rowMeans(otu.table), decreasing = TRUE),]
  
  # Create list of ASVs to merge
  otu.list <- row.names(otu.sort[(top+1):nrow(otu.sort),])
  if(any("Others" %in% rownames(otu.table))) otu.list <- c(otu.list,"Others")
  
  # perform the merger with the original sample counts
  merged <- merge_taxa(pobject, otu.list, 1)
  
  # change the taxa name
  taxa_names(merged)[taxa_names(merged) %in% otu.list[1]] <- "Others"
  
  # change taxa to "Other" for all levels not being NAs
  for (i in 1:ncol(tax_table(merged))){
    if (sum(!is.na(tax_table(merged)[,i]))){
      tax_table(merged)["Others",i] <- "Others"
    }
  }
  return(merged)
}

# This function merges ASVs that are below a defined ratio, with the default being 0.001 (0.1%)
merge_low_abundance <- function(pobject, threshold=0.001){
  
  # transform to sample counts to relative values
  transformed <- transform_sample_counts(pobject, function(x) x/sum(x))
  
  # Orientate and export otu table
  if (taxa_are_rows(transformed)) otu.table <- as.data.frame(otu_table(transformed)) else otu.table <- as.data.frame(t(otu_table(transformed)))
  
  # Create list of ASVs to merge
  otu.list <- row.names(otu.table[rowMeans(otu.table) < threshold,])
  if(any("Others" %in% rownames(otu.table))) otu.list <- c(otu.list,"Others")

  # perform the merger with the original sample counts
  merged <- merge_taxa(pobject, otu.list, 1)
  
  # change the taxa name
  taxa_names(merged)[taxa_names(merged) %in% otu.list[1]] <- "Others"
  
  # change taxa to "Other" for all levels not being NAs
  for (i in 1:ncol(tax_table(merged))){
    if (sum(!is.na(tax_table(merged)[,i]))){
      tax_table(merged)["Others",i] <- "Others"
    }
  }
  return(merged)
}

merge_prevalence <- function(pobject, variable, min.samples){
  # transform to sample counts to relative values
  if (taxa_are_rows(transformed)) otu.table <- as.data.frame(otu_table(transformed)) else otu.table <- as.data.frame(t(otu_table(transformed)))
  
  # make binary
  otu.table[otu.table != 0] <- 1
  
  # extract metadata
  dat <- data.frame(sample_data(pobject))
  
  # set groups
  vgroup <- unique(dat[,variable])
  
  # create count table
  counts <- data.frame(row.names = row.names(otu.table))
  for (i in seq(length(vgroup))) counts[,vgroup[i]] <- rowSums(otu.table[,dat[,variable]==vgroup[i]])
  
  counts$keep <- ifelse(rowSums(counts) > min.sample*length(vgroup), TRUE, FALSE)
  
  # Create list of ASVs to merge
  otu.list <- row.names(counts[!counts$keep,])
  if(any("Others" %in% rownames(otu.table))) otu.list <- c(otu.list,"Others")

  # perform the merger with the original sample counts
  merged <- merge_taxa(pobject, otu.list, 1)
  
  # change the taxa name
  taxa_names(merged)[taxa_names(merged) %in% otu.list[1]] <- "Others"
  
  # change taxa to "Other" for all levels not being NAs
  for (i in 1:ncol(tax_table(merged))){
    if (sum(!is.na(tax_table(merged)[,i]))){
      tax_table(merged)["Others",i] <- "Others"
    }
  }
  return(merged)
  
}
# save functions
save(merge_less_than_top, merge_low_abundance, file = "scripts/merge_abundance.Rdata")

# clear the environment and release memory
rm(list = ls(all.names = TRUE)) #will clear all objects includes hidden objects.
invisible(gc()) #free up memory and report the memory usage.

3 COMPOSITIONAL DIFFERENCES

The last part of the initial analyses is to compare the composition of the samples and test the differential abundance of individual ASVs or taxa.

3.1 AGGLOMERATE DATA - All data

Depending on the project and data it might be relevant to agglomerate data at species or higher taxonomic level for this type of analysis.

params <- readRDS("R_objects/comp_params.RDS")
# load
load(params$input)

# Remove day 20
phy <- subset_samples(physeq = phy, day %in% c("d0","d08","d12","d16","d21"))

# agglomerate at species and genus level
phy.sp <- tax_glom(phy, taxrank = "Species")
phy.ge <- tax_glom(phy, taxrank = "Genus")
phy.fa <- tax_glom(phy, taxrank = "Family")
phy.or <- tax_glom(phy, taxrank = "Order")
phy.cl <- tax_glom(phy, taxrank = "Class")
phy.ph <- tax_glom(phy, taxrank = "Phylum")

# save agglomerated phyloseq objects
save(phy.sp, phy.ge, phy.fa, phy.or, phy.cl, phy.ph, file = "R_objects/Agglomerated_all.RData")

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

3.2 AGGLOMERATE DATA - ITERATIONS

The following section agglomerates the data on taxonomic levels based on subsets of Material (Feces, Ileum, or Cecum), Day of sampling (Day 0, 8, 12, 16, 20, and 21), Feed type (LF = Low fibre, HF = High fibre), and Treatment (CTRL = Control, PFOS = PFOS). Agglomerations are saved in files with a specific naming convention for easy calling in further analysis.

params <- readRDS("R_objects/comp_params.RDS")
# load
load(params$input)

# Remove day 20
phy <- subset_samples(physeq = phy, day %in% c("d0","d08","d12","d16","d21"))

str.agg <- c("Feces","Feces_LF","Feces_HF","Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF","Feces_CTRL","Feces_PFOS","Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d21","Feces_d0d08","Feces_d08d21")

for (SUBNAME in str.agg) {
  # Determine material parameter
  if (grepl("Feces",SUBNAME) == TRUE) {
    MTRL <- "Feces"
    mn <- "MTRL"
  } else if (grepl("Cecum", SUBNAME) == TRUE) {
    MTRL <- "Cecum"
    mn <- "MTRL"
  } else if (grepl("Ileum", SUBNAME) == TRUE) {
    MTRL <- "Ileum"
    mn <- "MTRL"
  }
  # Determine parameters for differential abundance on specific days
  if (grepl("_d0d08", SUBNAME) ==TRUE) {
    DAY <- c("d0","d08")
    dn <- "day0+8"
  } else if (grepl("_d08",SUBNAME) == TRUE) {
    DAY <- c("d08")
    dn <- "day8"
  } else if (grepl("_d08d21",SUBNAME) == TRUE) {
    DAY <- c("d08","d21")
    dn <- "day8+21"
  } else if (grepl("_d0",SUBNAME) == TRUE) {
    DAY <- "d0"
    dn <- "day0"
  } else if (grepl("_d12",SUBNAME) == TRUE) {
    DAY <- "d12"
    dn <- "day12"
  } else if (grepl("_d16",SUBNAME) == TRUE) {
    DAY <- "d16"
    dn <- "day16"
  } else if (grepl("_d21",SUBNAME) == TRUE) {
    DAY <- "d21"
    dn <- "day21"
  } else {
    dn <- "all_days"
  }
  # Determine parameters for differential abundance in feed and treatment groups
  if (grepl(paste0(MTRL,"_CTRL"),SUBNAME) == TRUE) {
    TREAT <- "CTRL"
    FEED <- c("LF","HF")
  } else if (grepl(paste0(MTRL,"_PFOS"),SUBNAME) == TRUE) {
    TREAT <- "PFOS"
    FEED <- c("LF","HF")
  } else if (grepl("_LF_CTRL",SUBNAME) == TRUE) {
    TREAT <- "CTRL"
    FEED <- "LF"
  } else if (grepl("_LF_PFOS",SUBNAME) == TRUE) {
    TREAT <- "PFOS"
    FEED <- "LF"
  } else if (grepl("_HF_CTRL",SUBNAME) == TRUE) {
    TREAT <- "CTRL"
    FEED <- "HF"
  } else if (grepl("_HF_PFOS",SUBNAME) == TRUE) {
    TREAT <- "PFOS"
    FEED <- "HF"
  } else if (grepl(paste0(MTRL,"_LF"),SUBNAME) == TRUE) {
    TREAT <- c("CTRL","PFOS")
    FEED <- "LF"
  } else if (grepl(paste0(MTRL,"_HF"),SUBNAME) == TRUE) {
    TREAT <- c("CTRL","PFOS")
    FEED <- "HF"
  } else if (dn == "all_days") {
    TREAT <- c("CTRL","PFOS")
    FEED <- c("LF","HF")
  } else if (exists("dn") == TRUE) {
  } else {
    print("Something went wrong...")
  }

  #Subset data
  if (exists("DAY") == TRUE) {
    phy.sub <- subset_samples(phy, material == MTRL & day %in% DAY) 
  } else if (exists("DAY") == FALSE) {
    phy.sub <- subset_samples(phy, material == MTRL & treatment %in% TREAT & feed %in% FEED) 
  }

# Agglomerate at species and genus level
phy.sp <- tax_glom(phy.sub, taxrank = "Species")
phy.ge <- tax_glom(phy.sub, taxrank = "Genus")
phy.fa <- tax_glom(phy.sub, taxrank = "Family")
phy.or <- tax_glom(phy.sub, taxrank = "Order")
phy.cl <- tax_glom(phy.sub, taxrank = "Class")
phy.ph <- tax_glom(phy.sub, taxrank = "Phylum")

# save agglomerated phyloseq objects

save(phy.sp, phy.ge, phy.fa, phy.or, phy.cl, phy.ph, file = paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
print(paste0("Processed ",SUBNAME," correctly!"))
suppressWarnings(rm(MTRL,mn,DAY,dn,TREAT,FEED,phy.sp, phy.ge, phy.fa, phy.or, phy.cl, phy.ph,phy.sub), classes = "warning")
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4 DIFFERENTIAL ABUNDANCE

Differential abundance is tested here based on significant differences in relative abundance in specific taxonomic entries based on groupings of feed (HF and LF) and treatment (CTRL and PFOS).

There are many different methods that can be used to calculate differential abundance, all with their own advantages and disadvantages. Which method to use depends on the data and therefore I will be using DAtest [@DAtest] as described by Russel et al. (2018).

4.1 FECES

In the following we take a look at differential abundance in feces samples, dividing the analyses into Days and type of test: - Test of PFOS effect on taxa abundance (calling SUBNAME = _LF & _HF) - Test of FEED effect on taxa abundance (calling SUBNAME = _CTRL & _PFOS)

We test all on three taxonomic levels: - Species - Genus - Family

4.1.1 DAY 0

Day 0 samples should have no taxonomic differences between Treatment groups while a difference is expected in Feed. Therefore we test only these two variables on the entire dataset (cross analysis should not be necessary).

4.1.1.1 FEED

Testing differential abundance between feeding groups LF = Low fibre, HF = High fibre.

4.1.1.1.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
PDI <- "feed" #c("treatment","feed")
SUBNAME <- "Feces_d0"
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.sp <- subset_samples(phy.sp, material == MTRL)
  if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Test best method 
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)

# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",PDI,"_",LVL,".pdf"))
    p.fil
    dev.off()

Result: TTA (t-test)

RUN DAtest

Having now identified t-test - ALR (tta) to analyse the two groups as the optimal test we will use it to calculate differential abundance. This test is also used for remaining analyses in this section.

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",PDI,"_",LVL,".csv"))

# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)

# melt the data
DAm <- psmelt(DA.sig)

# Calculate pseudocount
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])

# Create plot
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed))) +
  geom_boxplot() + 
  scale_y_log10() + 
  coord_flip() + 
  scale_fill_manual(name = "Feed", values = params$COLFEED, breaks = c("HF","LF")) + 
  facet_grid(Phylum ~ . , scales = "free_y", space = "free") + 
  theme_bw() + 
  labs(fill = "Feed")
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY))
p.dT

suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 250, height = 250, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 250, height = 250, dpi = 300))

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.1.1.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",PDI,"_",LVL,".csv"))

# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)

# melt the data
DAm <- psmelt(DA.sig)

# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COLFEED, breaks = c("LF","HF")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw() + labs(fill = "Feed")
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY))
p.dT

suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 150, height = 250, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 150, height = 250, dpi = 300))

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.1.1.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttr(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",PDI,"_",LVL,".csv"))

# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)

# melt the data
DAm <- psmelt(DA.sig)

# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COLFEED, breaks = c("LF","HF")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw() + labs(fill = "Feed")
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY))
p.dT

suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 150, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 150, height = 200, dpi = 300))

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure

4.1.1.2 TREATMENT in HF

4.1.1.2.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.sp <- subset_samples(phy.sp, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.1.2.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.1.2.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.1.3 TREATMENT in LF

4.1.1.3.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.sp <- subset_samples(phy.sp, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.1.3.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.1.3.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.1.4 CONCLUSIONS

Significant differential abundances were observed between diets but not between treatment groups within each feed group on Day 0.

4.1.2 DAY 8

Samples at day 8 are taken 24 hours after last gavage of corn oil (+/- PFOS) and prior to dissection of half the animals.

4.1.2.1 Treatment in LF

Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in LF.

4.1.2.1.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Test best method 
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)

# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".pdf"))
    p.fil
    dev.off()

Result: TTA (t-test)

RUN DAtest

Having now identified t-test - ALR (tta) to analyse the two groups as the optimal test we will use it to calculate differential abundance. This test is also used for remaining analyses in this section.

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.2.1.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.2.1.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.2.2 Treatment in HF

Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in HF.

4.1.2.2.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.2.2.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.2.2.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.2.3 Feed in CTRL

Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the control treatment group (CTRL).

4.1.2.3.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.2.3.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ltt(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.2.3.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure

4.1.2.4 Feed in PFOS

Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the PFOS treatment group (PFOS).

4.1.2.4.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.2.4.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ltt(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.2.4.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ltt(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure

4.1.2.5 CONCLUSIONS

Significant differential abundances were observed between diets but not between treatment groups within each feed group on Day 8.

4.1.3 DAY 12

4.1.3.1 Treatment in LF

Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in LF.

4.1.3.1.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Test best method 
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)

# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".pdf"))
    p.fil
    dev.off()

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

Result: TTA (t-test)

Having now identified t-test - ALR (tta) to analyse the two groups as the optimal test we will use it to calculate differential abundance. This test is also used for remaining analyses in this section.

4.1.3.1.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttr(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.3.1.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.3.2 Treatment in HF

Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in HF. ##### SPECIES (NO SIGNIFICANCE)

params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.3.2.1 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.3.2.2 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.3.3 Feed in CTRL

Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the control treatment group (CTRL).

4.1.3.3.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.3.3.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d21","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.3.3.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttt(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.3.4 Feed in PFOS

Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the PFOS treatment group (PFOS).

4.1.3.4.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.3.4.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.3.4.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ltt(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure

4.1.3.5 CONCLUSIONS

Significant differential abundances were observed between diets but not between treatment groups within each feed group on Day 12.

4.1.4 DAY 16

4.1.4.1 Treatment in LF

Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in LF.

4.1.4.1.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Test best method 
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)

# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".pdf"))
    p.fil
    dev.off()

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.4.1.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.4.1.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.4.2 Treatment in HF

Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in HF. ##### SPECIES (NO SIGNIFICANCE)

params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.4.2.1 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.4.2.2 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.4.3 Feed in CTRL

Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the control treatment group (CTRL).

4.1.4.3.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.4.3.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d21","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.4.3.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttt(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.4.4 Feed in PFOS

Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the PFOS treatment group (PFOS).

4.1.4.4.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.4.4.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.4.4.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ltt(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure

4.1.4.5 CONCLUSIONS

Significant differential abundances were observed between diets but not between treatment groups within each feed group on Day 16.

4.1.5 DAY 21

Samples at day 8 are taken 24 hours after last gavage of corn oil (+/- PFOS) and prior to dissection of half the animals.

4.1.5.1 Treatment in LF

Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in LF.

4.1.5.1.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Test best method 
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)

# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".pdf"))
    p.fil
    dev.off()

RUN DAtest

Having now identified t-test - CLR (ttc) as the optimal test I will use it to calculate differential abundance

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.5.1.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttr(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.5.1.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.5.2 Treatment in HF

Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in HF. ##### SPECIES (NO SIGNIFICANCE)

params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.5.2.1 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
4.1.5.2.2 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

4.1.5.3 Feed in CTRL

Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the control treatment group (CTRL).

4.1.5.3.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.5.3.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d21","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.5.3.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttt(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure

4.1.5.4 Feed in PFOS

Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the PFOS treatment group (PFOS).

4.1.5.4.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
if (exists("FEED") == TRUE) {
  phy.sp <- subset_samples(phy.sp, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.sp <- subset_samples(phy.sp, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.sp <- subset_samples(phy.sp, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.5.4.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.ge <- subset_samples(phy.ge, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.ge <- subset_samples(phy.ge, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.ge <- subset_samples(phy.ge, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure
4.1.5.4.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")

LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

# Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
  phy.fa <- subset_samples(phy.fa, feed == FEED)
  print(paste0("Data contains type ",FEED," feed."))
} else {
  print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
  phy.fa <- subset_samples(phy.fa, day %in% DAY)
  print(paste0("Data contains samples from day ",DAY,"."))
} else {
    print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
  phy.fa <- subset_samples(phy.fa, treatment == TREAT)
  print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
  print("Data contains all treatment groups.")
}

# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)

# Test best method 
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)

# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".pdf"))
    p.fil
    dev.off()

RUN DAtest

Having now identified LIMMA - CLR (lic) as the optimal test I will use it to calculate differential abundance

# Run the selected analysis
filt.DA <- DA.lic(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
  p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
  print(p.dT)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
  if (exists("FEED") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
  } else if (exists("TREAT") == TRUE) {
  message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
  }
}

# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
Figure
Figure

4.1.5.5 CONCLUSIONS

Significant differential abundances were observed between diets but not between treatment groups within each feed group on Day 21.

5 ILEUM & CAECUM

Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in LF.

5.0.0.0.1 Check for PFOS-driven DA
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family"
LVLS <- c("Species","Genus","Family")
MTRL <- "Ileum_LF" #c("Feces","Ileum","Cecum")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Ileum_LF"
SUBNAMES <- c("Ileum_HF","Ileum_LF","Cecum_HF","Cecum_LF") #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS",             "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21",             "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
for (SUBNAME in SUBNAMES) {
  load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
  
  # Create sub-folder
  if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
  
  for (LVL in LVLS) {
    
  if (LVL == "Species") {
    phy.used <- phy.sp
  } else if (LVL == "Genus") {
    phy.used <- phy.ge
  } else if (LVL == "Family") {
    phy.used <- phy.fa
  }
  
# Filter data
filt <- preDA(data = phy.used, min.reads = 20, min.samples = 4)

# Run the selected analysis
filt.DA <- DA.kru(filt, predictor = PDI)

# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]

if (any(filt.p5$pval.adj > 0)) {
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
  
  # melt the data
  DAm <- psmelt(DA.sig)
  
  # Create plot
  pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
  
  # Statistics
stat.in <- DAm %>%
  group_by(.data[[LVL]], feed, day) %>%
  wilcox_test(as.formula("Abundance ~ feed_treat")) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance("p.adj") %>% 
  add_xy_position(x = "feed", dodge = 0.8) %>%
  p_format("p.adj", accuracy = 0.0001, trailing.zero = TRUE, new.col = TRUE)
stat.in$y.position <- log10(stat.in$y.position)
stat.in

stat.out <- DAm %>%
  group_by(.data[[LVL]], day) %>%
  wilcox_test(as.formula("Abundance ~ feed")) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance("p.adj") %>% 
  add_xy_position(x = "feed", dodge = 0.8) %>%
  p_format("p.adj", accuracy = 0.0001, trailing.zero = TRUE, new.col = TRUE)
stat.out$y.position <- log10(stat.out$y.position)
stat.out
  
  p <- ggplot(DAm, aes(x = feed, y = Abundance+pseudocount)) +
    geom_boxplot(aes(fill = fct_rev(feed_treat))) + 
    scale_y_log10() + 
#    coord_flip() + 
    scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("HF_CTRL" ="HF-CTRL","HF_PFOS" = "HF-PFOS","LF_CTRL" = "LF-CTRL","LF_PFOS" = "LF_PFOS"), breaks = c("HF_CTRL","HF_PFOS","LF_CTRL","LF_PFOS")) + 
    facet_grid(day ~ .data[[LVL]] , scales = "free_y", space = "free", labeller = labeller(day = c("d08" = "Day 8","d21" = "Day 21"))) + 
    theme_pubr(border = TRUE)
  p
  
  p.stat <- p + stat_pvalue_manual(stat.in, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE, color = "red") +
  stat_pvalue_manual(stat.out, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE)
  p.stat
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".pdf"), p.stat, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".png"), p.stat, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
  } else {
    print(paste0("No significant results when testing ",PDI," in ",SUBNAME," at ",LVL,"."))
  }

  }
}
## 91 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_HF at Species."
## 65 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_HF at Genus."
## 25 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_HF at Family."
## 105 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_LF at Species."
## 76 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_LF at Genus."
## 27 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_LF at Family."
## 59 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_HF at Species."
## 39 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_HF at Genus."
## 19 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_HF at Family."
## 57 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_LF at Species."
## 41 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_LF at Genus."
## 19 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_LF at Family."
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

5.1 Conclusion

No significant differences observed in caecum and ileum.

6 ENTEROBACTERIACEAE > GENERA

6.1 ABUNDANCE + PSEUDOCOUNT (Figure 7)

Significant difference in abundance was found for unclassified genera “Family_Enterobacteriaceae” above. Here we investigate and plot all genera present on day 8 in the family “Enterobacteriaceae”.

# Collected figures for Families showing significant differences
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus"
LVLS <- c("Genus","Family")
SUBNAME <- "Feces"
PDI <- "feed_treat"
TX <- "Enterobacteriaceae" 

# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))

# Load agglomorated subset
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))

for (LVL in LVLS) {
  # Filter data
  if (LVL == "Genus") {
    filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
  } else if (LVL == "Family") {
    filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
  }

  # Run tha selected analysis Wilcoxon
  filt.DA <- DA.wil(filt, predictor = "feed")
  
  # Create a subset of the samples
  filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
  filt.t <- subset_taxa(filt.ra, Family == TX)
  
  # Melt the data
  t.melt <- psmelt(filt.t)
  
  # Create pseudocounts
  pseudocount <- min(t.melt$Abundance[t.melt$Abundance != 0])
  
  # Sort samples to phylogenetic levels (alphabetically)
  if (LVL == "Genus") {
    t.sorted <- t.melt %>% select(Phylum:Genus) %>% arrange(Phylum, Class, Order, Family, Genus)
    genus.sorted <- as.character(unique(t.sorted$Genus))
    t.melt$genus_new <- factor(t.melt$Genus, levels = genus.sorted)
    
    levels(t.melt$genus_new) %>% head
  } else if (LVL == "Family") {
    t.sorted <- t.melt %>% select(Phylum:Family) %>% arrange(Phylum, Class, Order, Family)
    fam.sorted <- as.character(unique(t.sorted$Family))
    t.melt$fam_new <- factor(t.melt$Family, levels = fam.sorted)
    
    levels(t.melt$genus_new) %>% head
  }

  # Statistics
  stat.in <- t.melt %>%
    group_by(.data[[LVL]], feed, day) %>%
    wilcox_test(as.formula("Abundance ~ feed_treat")) %>%
    adjust_pvalue(method = "BH") %>%
    add_significance("p.adj") %>% 
    add_xy_position(x = "feed", dodge = 0.8) %>%
    p_format("p.adj", accuracy = 0.0001, trailing.zero = TRUE, new.col = TRUE)
  stat.in$y.position <- log10(stat.in$y.position)
  stat.in
  
  stat.out <- t.melt %>%
    group_by(.data[[LVL]], day) %>%
    wilcox_test(as.formula("Abundance ~ feed")) %>%
    adjust_pvalue(method = "BH") %>%
    add_significance("p.adj") %>% 
    add_xy_position(x = "feed", dodge = 0.8) %>%
    p_format("p.adj", accuracy = 0.0001, trailing.zero = TRUE, new.col = TRUE)
  stat.out$y.position <- log10(stat.out$y.position)
  stat.out
  
  # Plot data and save as output formats
  p <- ggplot(t.melt, aes(feed, Abundance+pseudocount)) +
    geom_boxplot(aes(fill = feed_treat), color = "black") +
    scale_y_log10(name = "Abundance+pseudocount (log10)", expand = expansion(mult = c(0, 0.075)), labels =function(x) paste0(x, "%")) +
    scale_fill_manual(name = "Group", values = params$COL1,
                      breaks = c("HF_CTRL","HF_PFOS","LF_CTRL","LF_PFOS"),
                      labels = c("HF-CTRL","HF-PFOS","LF-CTRL","LF-PFOS")) +
    facet_grid(.data[[LVL]] ~ day, space = "free", 
               labeller = labeller(day = c("d0" = "Day 0","d08" = "Day 8","d12" = "Day 12","d16" = "Day 16","d21" = "Day 21"))) +
    theme_pubr(border = TRUE) +
    guides(color = "none") +
    theme(axis.title.x = element_blank())
  p
  
  if (SUBNAME == "Feces") {
      if (LVL == "Genus") {
        p.stat <- p + stat_pvalue_manual(stat.in, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE, color = "red") +
    stat_pvalue_manual(stat.out, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE, y.position = c(1.1,1.4,1.2,1.3,1.3,0.01,0.01))
      } else {
        p.stat <- p + stat_pvalue_manual(stat.in, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE, color = "red") +
    stat_pvalue_manual(stat.out, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE)
      }
  } else if (SUBNAME %in% c("Ileum","Cecum")) {
    p.stat <- p + #stat_pvalue_manual(stat.in, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE, color = "red") +
    stat_pvalue_manual(stat.out, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE)
  }
  print(p.stat)
  
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",LVL,"_",TX,".pdf"), p.stat, device = "pdf", units = "mm", width = 200, height = 100, dpi = 300))
  suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",LVL,"_",TX,".png"), p.stat, device = "png", units = "mm", width = 200, height = 100, dpi = 300))
  
  save(stat.in, stat.out, p.stat, file = paste0("plots/differential_abundance/",SUBNAME,"/plot_stat_",SUBNAME,"_",LVL,".Rdata"))
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())

Figure Figure

7 SETTINGS

Overview of the packages that was used for this analysis

7.1 PACKAGES

The analysis was run in R version getRversion() using the following packages:

pack <- data.frame(Package = (.packages()))

for (i in seq(nrow(pack))) pack$Version[i] <- as.character(packageVersion(pack$Package[i]))

kbl(pack[order(pack$Package),], row.names = F) %>% kable_classic(lightable_options = "striped")   
Package Version
ape 5.8
base 4.3.1
datasets 4.3.1
DAtest 2.8.0
decontam 1.22.0
dplyr 1.1.4
forcats 1.0.0
ggplot2 3.5.1
ggpubr 0.6.0
graphics 4.3.1
grDevices 4.3.1
kableExtra 1.4.0
lattice 0.21.8
lubridate 1.9.3
magick 2.8.4
methods 4.3.1
pals 1.9
permute 0.9.7
phyloseq 1.46.0
plotly 4.10.4
purrr 1.0.2
readr 2.1.5
reshape2 1.4.4
rstatix 0.7.2
stats 4.3.1
stringr 1.5.1
tibble 3.2.1
tidyr 1.3.1
tidyverse 2.0.0
utils 4.3.1
vegan 2.6.6.1

7.2 PARAMETERS

params <- readRDS("R_objects/comp_params.RDS")

tmp <- unlist(params)
dat <- data.frame(Parameter = names(tmp), Value = unname(tmp))


kbl(dat, row.names = F) %>% kable_classic(lightable_options = "striped")
Parameter Value
input R_objects/Phyloseq.Rdata
meta input/metadata_fibrex.csv
batch Run
indeces Observed|Shannon|FaithPD|Chao1
COL0.CTRL #eeeeee
COL0.PFOS #666666
COL1.LF_CTRL #a5cee3
COL1.LF_PFOS #1778b6
COL1.HF_CTRL #b4d88a
COL1.HF_PFOS #30a148
COL2.LF_CTRL_d8 #a5cee3
COL2.LF_CTRL_d21 #a5cee3
COL2.LF_PFOS_d8 #1778b6
COL2.LF_PFOS_d21 #1778b6
COL2.HF_CTRL_d8 #b4d88a
COL2.HF_CTRL_d21 #b4d88a
COL2.HF_PFOS_d8 #30a148
COL2.HF_PFOS_d21 #30a148
COLFEED.LF #1778b6
COLFEED.HF #30a148